import requests
from bs4 import BeautifulSoup
import csv

def get_movie_info(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    movie_list = soup.find('ol', class_='grid_view')
    movies = movie_list.find_all('li')

    for movie in movies:
        num = movie.find('em').text
        title = movie.find('span', class_='title').text
        other_info = movie.find('div', class_='bd').find('p').text.strip().split(' ')
        try:
            year = other_info[0].strip().split('/')[2].strip()
            area = other_info[0].strip().split('/')[1].strip()
            genre = other_info[0].strip().split('/')[2].strip()
        except IndexError:
            year = ' '
            area = ' '
            genre = ' '
        rating_num = movie.find('span', class_='rating_num').text
        comment_num = movie.find('span', text='评价')
        if comment_num:
            comment_num = comment_num.next_sibling.text[:-3]
        else:
            comment_num = ''
        href = movie.find('a')['href']

        yield [num, title, year, area, genre, rating_num, comment_num, href]

def save_to_csv(movies, filename):
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['num', 'title', 'year', 'area', 'genre', 'rating_num', 'comment_num', 'href'])
        for movie in movies:
            writer.writerow(movie)

if __name__ == '__main__':
    base_url = 'https://movie.douban.com/top250?start='
    all_movies = []

    for i in range(0, 250, 25):
        url = base_url + str(i)
        movies = get_movie_info(url)
        all_movies.extend(movies)

    save_to_csv(all_movies, 'top250-1.csv')